# Import gene expression / count data
counts <- read.table("data/tagseq/processed/counts.txt",
header = TRUE, row.names = 1)
# Trim column names to just the sample name
colnames(counts) <- str_sub(colnames(counts), 1, 7)
# Sort columns by sample name
counts <- counts[, order(colnames(counts))]
# Import sample metadata
sdat <- read_xlsx("data/sample_metadata.xlsx") %>%
mutate(sample = paste0(species, colony, ".", core),
sym = if_else(trt1 == "c", "C", "D"), # recode treatment names
trt = if_else(trt2 == "h", "heat", "ctrl"),
symtrt = interaction(sym, trt),
colony = factor(colony)) %>%
mutate_if(is.character, as.factor) %>%
arrange(sample) %>% # order by sample name
column_to_rownames(var = "sample") # set sample name to rownames
# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = sdat,
design = ~ colony)
# (Only colonies 20, 22, and 26 were in repeat heat stress exp)
# (Only time point at end of heat stress)
hs.dds <- dds[, !is.na(colData(dds)$symtrt)]
# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(hs.dds) <- formula(~ colony + symtrt)
colData(hs.dds) <- droplevels(colData(hs.dds))
# Number of samples
nsamples <- ncol(counts(hs.dds))
# Number of reads per sample
rps <- qplot(colSums(counts(hs.dds))) +
labs(x = "Reads per sample", y = "Number of samples",
title = "Read counts per sample") +
geom_label(aes(x = 6e5, y = 10, label = paste(nsamples, "samples")))
# Sample Mc20-05 has far more counts than others -- may need to rarefy this sample down to median count?
# Number of genes
# Remove genes with counted less than three times across entire dataset
hs.dds <- hs.dds[ rowSums(counts(hs.dds)) > 3, ]
ngenes <- nrow(counts(hs.dds))
# Number of reads per gene
rpg <- qplot(log10(rowSums(counts(hs.dds)))) +
labs(x = "Reads per gene", y = "Number of genes",
title = "Read counts per gene") +
geom_label(aes(x = 4, y = 750, label = paste(ngenes, "genes")))
plot_grid(rps, rpg)
# Normalize expression data for visualization purposes using VST tranformation
vsd <- vst(hs.dds, blind = FALSE)
(based on normalized gene counts) ** Wright et al. also perform PCoA only including DEGs, and of course this shows more clustering by group… ** Could also consider performing PCoA only including genes with <50% residual variance?
## Run PCA and return data to visualize with ggplot
pcaData <- plotPCA(vsd, intgroup = c( "colony", "sym", "trt"), returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
## Plot PCA
ggplot(pcaData, aes(x = PC1, y = PC2, color = colony, shape = sym, size = trt)) +
geom_point(alpha = 0.5) +
labs(title = "PCA: all genes") +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance"))
(based on sample distances)
## Calculate Euclidean distances among samples
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix( sampleDists )
## Plot NMDS
mds <- as.data.frame(colData(vsd)) %>%
cbind(cmdscale(sampleDistMatrix))
ggplot(mds, aes(x = `1`, y = -`2`, color = colony, shape = sym, size = trt)) +
geom_point(alpha = 0.5) + coord_fixed() +
labs(title = "PCoA: all genes")
# optional step to run analysis in parallel on multicore machines
# Here use 8 threads
# This is strongly recommended since the analysis
# can be computationally intensive
cl <- makeCluster(8)
registerDoParallel(cl)
# Get normalized read counts for hs.dds / heat stress experiment
ncounts <- assay(vsd) # normalized counts, previously filtered to remove genes with total count < 3
# Specify variables to consider
form <- ~ (1|colony) + (1|symtrt) + (1|tank)
# Get sample metadata
info <- data.frame(colData(hs.dds))
# Fit model and extract results
# Interpretation: the variance explained by each variables after correcting for all other variables
varPart <- fitExtractVarPartModel(ncounts, form, info)
##
## Finished...
## Total: 100 s
stopCluster(cl)
# sort variables (i.e. columns) by median fraction of variance explained
vp <- sortCols( varPart )
# Violin plot of contribution of each variable to total variance, each point is a gene
#plotVarPart( vp , label.angle = 60)
# This plot shows that for most genes, most variance is residual/unexplained.
# Filter these genes down to look only at those whose variance is explained (>50%) by exp. design
# Filter out the genes where >50% of the variance remains unexplained by colony/symtrt/tank
vpf <- varPart[varPart$Residuals < 0.5, ]
# Violin plot of contribution of each variable to total variance, each point is a gene
plotVarPart( vpf , label.angle = 60, main = "Variance fraction explained")
# Of these genes, calculate median variance explained by colony vs. symbiont/treatment vs. tank effect:
vpf %>% summarize_all(median) %>% knitr::kable(caption = "Median variance explained for all genes")
| colony | symtrt | tank | Residuals |
|---|---|---|---|
| 0.5052399 | 0.0470988 | 0.006324 | 0.4232858 |
For plotting and visualizing the differences that ARE driven by treatments, it will be helpful to remove the effect of colony.
# remove colony effect for downstream visualization
## but note DE testing will still use the un-corrected normailzed count data
vsd2 <- vsd
assay(vsd2) <- limma::removeBatchEffect(assay(vsd), vsd$colony)
hs.dds <- DESeq(hs.dds)
# Test for differential expression between D and C corals in the control treatment
Dctrl.Cctrl <- results(hs.dds, contrast = c("symtrt", "D.ctrl", "C.ctrl"))
# Subset DE genes with adjusted p-value < 0.1
Dctrl.Cctrl.sig <- Dctrl.Cctrl[which(Dctrl.Cctrl$padj < 0.1 ), ]
Dctrl.Cctrl.p05 <- Dctrl.Cctrl[which(Dctrl.Cctrl$pvalue < 0.05), ]
# Get names of DE genes
Dctrl.Cctrl.DEgenes <- rownames(Dctrl.Cctrl.sig)
# Mcavernosa08323: Peroxiredoxin-5 -- protection against oxidative stress
# Summarize DE
as.data.frame(Dctrl.Cctrl) %>%
filter(padj < 0.1) %>%
summarise(total = n(),
up = sum(log2FoldChange > 0),
down = sum(log2FoldChange < 0)) %>%
knitr::kable(caption = "Differential expression")
| total | up | down |
|---|---|---|
| 44 | 17 | 27 |
# Make a heatmap
# Build function to make a heatmap
phm <- function(sig, vsd, grp, lvl, var, ...) {
sig.up <- sig[sig$log2FoldChange > 0, ]
sig.down <- sig[sig$log2FoldChange < 0, ]
# Subset just DE genes from only Dctrl and Cctrl samples
mat <- assay(vsd)[rownames(sig), colData(vsd)[, grp] == lvl]
mat <- mat - rowMeans(mat)
# Order columns (samples) by treatment, and order rows (genes) by up vs. down
anno <- as.data.frame(colData(vsd)[colData(vsd)[, grp] == lvl, c("sym", "trt")])
anno <- anno[order(anno[var]), ]
mat <- mat[c(rownames(sig.up), rownames(sig.down)), rownames(anno)]
pheatmap(..., mat, annotation_col = anno, cluster_cols = F, cluster_rows = F,
color = colorRampPalette(rev(brewer.pal(n=7, name = "RdYlBu")))(100))
}
phm(Dctrl.Cctrl.sig, vsd2, "trt", "ctrl", "sym", main = "D vs. C corals at ambient temp")
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis
# Generate logP values
Dctrl.Cctrl$logP <- -log10(Dctrl.Cctrl$pvalue) * sign(Dctrl.Cctrl$log2FoldChange)
# Write to file
setwd("code/GO_MWU")
write.table(data.frame(gene=rownames(Dctrl.Cctrl), logP=Dctrl.Cctrl$logP),
file = "Dctrl.Cctrl.logP.txt",
row.names = FALSE, quote = FALSE, sep = ",")
# Run GO_MWU
commandArgs <- function(...) c("Dctrl.Cctrl.logP.txt", "BP")
source("GO_MWU.R")
# Plot GO_MWU results
setwd("code/GO_MWU")
resultsBP <- gomwuPlot(inFile = "Dctrl.Cctrl.logP.txt",
goAnnotations = "Mcavernosa_gene2go.txt",
goDivision = "BP",
level1 = 0.1, level2 = 0.05, level3 = 0.01, txtsize = 1.2, treeHeight = 0.5)
## GO terms dispayed: 39
## "Good genes" accounted for: 108 out of 202 ( 53% )
# save text representation of results to file
# save(resultsBP, file = "Dctrl.Cctrl.GO_MWU.results.RData")
Red GO terms are significantly upregulated in D vs. C corals at ambient temp. Blue terms are significantly downregulated.
data.frame(Dctrl.Cctrl.sig) %>%
mutate(gene = rownames(.)) %>%
arrange(-log2FoldChange)
## baseMean log2FoldChange lfcSE stat pvalue padj
## 1 15.115757 10.542324 2.1655327 4.868236 1.125989e-06 0.0011524494
## 2 21.692784 8.850311 1.8909236 4.680417 2.862917e-06 0.0014650979
## 3 9.283273 4.540647 1.1529303 3.938353 8.204278e-05 0.0197863735
## 4 12.761862 4.218310 1.2345101 3.416991 6.331736e-04 0.0498502447
## 5 11.039594 3.744560 1.1857360 3.158004 1.588532e-03 0.0848018930
## 6 7.699334 3.385629 0.9087958 3.725401 1.950047e-04 0.0303011046
## 7 19.443029 3.064893 0.8906913 3.441027 5.795112e-04 0.0486190557
## 8 8.133818 3.027349 0.9453694 3.202292 1.363388e-03 0.0786070175
## 9 135.194980 2.867244 0.6621037 4.330506 1.487670e-05 0.0050754325
## 10 10.260505 2.758725 0.8923893 3.091392 1.992202e-03 0.0926826830
## 11 16.397085 2.629848 0.7924404 3.318670 9.044727e-04 0.0617151866
## 12 645.534581 2.578295 0.7507181 3.434438 5.937843e-04 0.0486190557
## 13 13.838916 2.529071 0.6823856 3.706220 2.103755e-04 0.0303011046
## 14 38.464685 2.115802 0.5821003 3.634773 2.782258e-04 0.0316404550
## 15 18.931479 2.067524 0.6405047 3.227961 1.246759e-03 0.0773368225
## 16 7.138343 1.975390 0.6371809 3.100203 1.933881e-03 0.0920617314
## 17 27.360585 1.814705 0.5755370 3.153063 1.615669e-03 0.0848018930
## 18 11.214328 -1.971867 0.5901419 -3.341344 8.337378e-04 0.0609521901
## 19 43.591116 -2.136434 0.5790183 -3.689753 2.244720e-04 0.0303011046
## 20 13.554032 -2.309879 0.4990340 -4.628700 3.679682e-06 0.0015064617
## 21 10.138144 -2.404685 0.6423734 -3.743437 1.815199e-04 0.0303011046
## 22 22.519026 -2.587057 0.6966827 -3.713394 2.044980e-04 0.0303011046
## 23 38.603276 -2.861149 0.8860158 -3.229230 1.241241e-03 0.0773368225
## 24 9.319935 -2.944959 0.9230735 -3.190384 1.420840e-03 0.0786070175
## 25 13.274831 -3.113086 0.8693880 -3.580779 3.425710e-04 0.0333925142
## 26 35.770309 -3.149327 1.0157716 -3.100428 1.932411e-03 0.0920617314
## 27 6.823777 -3.234428 0.8876955 -3.643624 2.688265e-04 0.0316404550
## 28 3.928434 -3.255070 1.0382218 -3.135236 1.717162e-03 0.0878757643
## 29 24.882164 -3.285060 0.8936306 -3.676082 2.368430e-04 0.0303011046
## 30 3.942662 -3.298999 1.0052992 -3.281609 1.032167e-03 0.0681562906
## 31 5.351161 -3.453758 1.1101524 -3.111067 1.864127e-03 0.0920617314
## 32 5.412793 -3.605086 0.9968628 -3.616431 2.986929e-04 0.0321802249
## 33 10.512404 -3.845262 1.2024706 -3.197801 1.384797e-03 0.0786070175
## 34 16.350534 -3.875718 1.0467573 -3.702595 2.134058e-04 0.0303011046
## 35 36.368894 -3.916831 1.1277228 -3.473221 5.142513e-04 0.0464363843
## 36 25.838461 -3.940146 1.2278969 -3.208857 1.332636e-03 0.0786070175
## 37 54.864470 -3.982907 0.9344319 -4.262383 2.022585e-05 0.0059146165
## 38 19.271177 -4.118358 0.8654695 -4.758525 1.950131e-06 0.0013306395
## 39 18.382886 -4.357421 1.2559831 -3.469331 5.217571e-04 0.0464363843
## 40 6.691314 -4.659586 1.3771716 -3.383446 7.158230e-04 0.0542699858
## 41 7.094373 -5.015491 1.3940363 -3.597819 3.208966e-04 0.0328437624
## 42 23.800207 -5.117619 1.3040956 -3.924267 8.699431e-05 0.0197863735
## 43 29.305953 -5.217760 1.5679947 -3.327664 8.757728e-04 0.0617151866
## 44 5.457322 -22.560157 4.1735957 -5.405449 6.464639e-08 0.0001323312
## gene
## 1 Mcavernosa23377
## 2 Mcavernosa23531
## 3 Mcavernosa01063
## 4 Mcavernosa01062
## 5 Mcavernosa00334
## 6 Mcavernosa05961
## 7 Mcavernosa15278
## 8 Mcavernosa01259
## 9 Mcavernosa08323
## 10 Mcavernosa32478
## 11 Mcavernosa05331
## 12 Mcavernosa10389
## 13 Mcavernosa04519
## 14 Mcavernosa09839
## 15 Mcavernosa03538
## 16 Mcavernosa10382
## 17 Mcavernosa13113
## 18 Mcavernosa22769
## 19 Mcavernosa08569
## 20 Mcavernosa03796
## 21 Mcavernosa00734
## 22 Mcavernosa00547
## 23 Mcavernosa27949
## 24 Mcavernosa25318
## 25 Mcavernosa01767
## 26 Mcavernosa24519
## 27 Mcavernosa13828
## 28 Mcavernosa04297
## 29 Mcavernosa00920
## 30 Mcavernosa05217
## 31 Mcavernosa13202
## 32 Mcavernosa19193
## 33 Mcavernosa21783
## 34 Mcavernosa15354
## 35 Mcavernosa25626
## 36 Mcavernosa13875
## 37 Mcavernosa30546
## 38 Mcavernosa15521
## 39 Mcavernosa32338
## 40 Mcavernosa10380
## 41 Mcavernosa35232
## 42 Mcavernosa08291
## 43 Mcavernosa16961
## 44 Mcavernosa29636
#Mcavernosa23377
plotCounts(hs.dds, gene = "Mcavernosa23377", intgroup = c("sym", "trt"))
# no good annotations for this protein.
#Mcavernosa23531
plotCounts(hs.dds, gene = "Mcavernosa23531", intgroup = c("sym", "trt"))
# has a BRICHOS domain, which may have something to do with post-translational modification
#Mcavernosa01063
plotCounts(hs.dds, gene = "Mcavernosa01063", intgroup = c("sym", "trt"))
#A toxin in nematocysts. Kalicludines and kaliseptine
#Mcavernosa01062
plotCounts(hs.dds, gene = "Mcavernosa01062", intgroup = c("sym", "trt"))
# Transmembrane protein with extracellular Kunitz domain/trypsin inhibitor
# Trypsin is an enzyme that breaks things down as part of digestion... arrested phagosome?
#Mcavernosa00334
plotCounts(hs.dds, gene = "Mcavernosa00334", intgroup = c("sym", "trt"))
# RNA processing?
#Mcavernosa05961
plotCounts(hs.dds, gene = "Mcavernosa05961", intgroup = c("sym", "trt"))
#Enkurin -- could be localized to cilia? Sigg et al.
#Mcavernosa01259
plotCounts(hs.dds, gene = "Mcavernosa01259", intgroup = c("sym", "trt"))
# hsp molecular chaperone DnaJ domain
#Mcavernosa08323
plotCounts(hs.dds, gene = "Mcavernosa08323", intgroup = c("sym", "trt"))
# Peroxiredoxin - Thioredoxin-like - response to oxidative stress
# See Maor-Landaw and Levy 2016 for analysis of expression in corals
#Mcavernosa32478
plotCounts(hs.dds, gene = "Mcavernosa32478", intgroup = c("sym", "trt"))
# kintoun - cilia protein?
# kintoun-like protein found to be SNP outlier in corals adapted to hot Palau lagoons (H Rivera dissertation)
# could function as stress response chaperone (von Morgen et al.)
#Mcavernosa05331
plotCounts(hs.dds, gene = "Mcavernosa05331", intgroup = c("sym", "trt"))
#BMP-binding endothelial regulator protein - crossveinless2. lots of different domains on InterPro...
#Mcavernosa10389
plotCounts(hs.dds, gene = "Mcavernosa10389", intgroup = c("sym", "trt"))
#Major family superfamily (MFS transporter) domain containing protein
# Transports small solutes. may transport G3P
# Sproles et al. -- MFS transporters in cnidarian dinoflagellate symbiosis
#Mcavernosa04519
plotCounts(hs.dds, gene = "Mcavernosa04519", intgroup = c("sym", "trt"))
# Copper-transporting ATPase
# Mcavernosa09839
plotCounts(hs.dds, gene = "Mcavernosa09839", intgroup = c("sym", "trt"))
# tyrosine phosphatase
# Mcavernosa03538
plotCounts(hs.dds, gene = "Mcavernosa03538", intgroup = c("sym", "trt"))
# no annotations, signal peptide domain and non-cytoplasmic domain on INterPro
# Mcavernosa10382
plotCounts(hs.dds, gene = "Mcavernosa10382", intgroup = c("sym", "trt"))
# protein prune homolog
# interacts with pro and anti apoptotic proteins (InterPro)
#Mcavernosa13113
plotCounts(hs.dds, gene = "Mcavernosa13113", intgroup = c("sym", "trt"))
# serine incorporator
# Mcavernosa29636
plotCounts(hs.dds, gene = "Mcavernosa29636", intgroup = c("sym", "trt"))
# leucine-rich repeat-containing protein
#Mcavernosa16961
plotCounts(hs.dds, gene = "Mcavernosa16961", intgroup = c("sym", "trt"))
# protein-protein interactions
#Mcavernosa08291
plotCounts(hs.dds, gene = "Mcavernosa08291", intgroup = c("sym", "trt"))
# "serologically defined colon cancer antigen"
# aka ENTR1 - endosome-associated-trafficking regulator 1
# without it, TNF receptor expression is reduced, resulting in resistance to TNF-induced apoptosis
# and also protein trafficking and secretion decreased. (Neznanov et al. 2005)
#Mcavernosa35232
plotCounts(hs.dds, gene = "Mcavernosa35232", intgroup = c("sym", "trt"))
# no blast hits or annotations
# mobidb disoarder prediction
#Mcavernosa10380
plotCounts(hs.dds, gene = "Mcavernosa10380", intgroup = c("sym", "trt"))
#hsp20-like chaperone, calcyclin-binding,
# interacts with SIAH, an E3 ubiquitin ligase with role in apoptosis
#Mcavernosa32338
plotCounts(hs.dds, gene = "Mcavernosa32338", intgroup = c("sym", "trt"))
# Ras-related protein Rab33, GTPase activity
# GTPases control assembly of vesicle coats for transport vesicles
#Mcavernosa15521
plotCounts(hs.dds, gene = "Mcavernosa15521", intgroup = c("sym", "trt"))
#BRO1 domain - vacuoles and lysosomes targeting - interpro
# programmed cell death interacting protein - blast
# Li et al. Sci Adv -- DE in symbiosis establishment/breakdown aiptasia
# Mcavernosa30546
plotCounts(hs.dds, gene = "Mcavernosa30546", intgroup = c("sym", "trt"))
# CAAX prenyl protease -- protein degradation?
# Kirk et al. 2018 Mol Ecol
# Mcavernosa13875
plotCounts(hs.dds, gene = "Mcavernosa13875", intgroup = c("sym", "trt"))
# DNA-binding, transcription factor
# Mcavernosa25626
plotCounts(hs.dds, gene = "Mcavernosa25626", intgroup = c("sym", "trt"))
#Mcavernosa15354
plotCounts(hs.dds, gene = "Mcavernosa15354", intgroup = c("sym", "trt"))
# Mcavernosa21783
plotCounts(hs.dds, gene = "Mcavernosa21783", intgroup = c("sym", "trt"))
# peroxisomal membrane protein
# peroxisomes involved in reduction of hydrogen peroxide / oxidative stress
# Test for differences between D and C corals in the heated treatment
Dheat.Cheat <- results(hs.dds, contrast=c("symtrt", "D.heat", "C.heat"))
# Subset DE genes with adjusted p-value < 0.1
Dheat.Cheat.sig <- Dheat.Cheat[which(Dheat.Cheat$padj < 0.1 ), ]
Dheat.Cheat.p05 <- Dheat.Cheat[which(Dheat.Cheat$pvalue < 0.05), ]
# Get names of DE genes
Dheat.Cheat.DEgenes <- rownames(Dheat.Cheat.sig)
# Summarize DE
as.data.frame(Dheat.Cheat) %>%
filter(padj < 0.1) %>%
summarise(total = n(),
up = sum(log2FoldChange > 0),
down = sum(log2FoldChange < 0)) %>%
knitr::kable(caption = "Differential expression")
| total | up | down |
|---|---|---|
| 1 | 1 | 0 |
# Make a heatmap
phm(Dheat.Cheat.sig, vsd2, "trt", "heat", "sym")
Only three genes are differentially expressed between D and C corals under heat stress. Not enough genes to run a GO enrichment analysis.
# Test for differences between C corals in the control vs. heated treatment
Cheat.Cctrl <- results(hs.dds, contrast=c("symtrt", "C.heat", "C.ctrl"))
# Subset DE genes with adjusted p-value < 0.1
Cheat.Cctrl.sig <- Cheat.Cctrl[which(Cheat.Cctrl$padj < 0.1 ), ]
Cheat.Cctrl.p05 <- Cheat.Cctrl[which(Cheat.Cctrl$pvalue < 0.05), ]
# Get names of DE genes
Cheat.Cctrl.DEgenes <- rownames(Cheat.Cctrl.sig)
# Summarize DE
as.data.frame(Cheat.Cctrl) %>%
filter(padj < 0.1) %>%
summarise(total = n(),
up = sum(log2FoldChange > 0),
down = sum(log2FoldChange < 0)) %>%
knitr::kable(caption = "Differential expression")
| total | up | down |
|---|---|---|
| 93 | 37 | 56 |
# Make a heatmap
phm(Cheat.Cctrl.sig, vsd2, "sym", "C", "trt") # use vsd2 for the normcounts with batch (colony) effect removed
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis
# Generate logP values
Cheat.Cctrl$logP <- -log10(Cheat.Cctrl$pvalue) * sign(Cheat.Cctrl$log2FoldChange)
# Write to file
setwd("code/GO_MWU")
write.table(data.frame(gene=rownames(Cheat.Cctrl), logP=Cheat.Cctrl$logP),
file = "Cheat.Cctrl.logP.txt",
row.names = FALSE, quote = FALSE, sep = ",")
# Run GO_MWU
commandArgs <- function(...) c("Cheat.Cctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 44 GO terms at 10% FDR
# Plot GO_MWU results
setwd("code/GO_MWU")
results <- gomwuPlot(inFile = "Cheat.Cctrl.logP.txt",
goAnnotations = "Mcavernosa_gene2go.txt",
goDivision = "BP",
level1 = 0.05, level2 = 0.025, level3 = 0.01, txtsize = 1.2, treeHeight = 0.5)
## GO terms dispayed: 39
## "Good genes" accounted for: 97 out of 187 ( 52% )
# save text representation of results to file
# save(results, file = "Cheat.Cctrl.GO_MWU.results.RData")
Red GO terms are significantly upregulated in heated vs. control C corals. Blue terms are significantly downregulated.
# Test for differences between D corals in the control vs. heated treatment
Dheat.Dctrl <- results(hs.dds, contrast=c("symtrt", "D.heat", "D.ctrl"))
# Subset DE genes with adjusted p-value < 0.1
Dheat.Dctrl.sig <- Dheat.Dctrl[which(Dheat.Dctrl$padj < 0.1 ), ]
Dheat.Dctrl.p05 <- Dheat.Dctrl[which(Dheat.Dctrl$pvalue < 0.05), ]
# Get names of DE genes
Dheat.Dctrl.DEgenes <- rownames(Dheat.Dctrl.sig)
# Summarize DE
as.data.frame(Dheat.Dctrl) %>%
filter(padj < 0.1) %>%
summarise(total = n(),
up = sum(log2FoldChange > 0),
down = sum(log2FoldChange < 0)) %>%
knitr::kable(caption = "Differential expression")
| total | up | down |
|---|---|---|
| 35 | 26 | 9 |
# Make a heatmap
phm(Dheat.Dctrl.sig, vsd2, "sym", "D", "trt")
## Use GO_MWU to look at function of DE genes
# Only run this once -- once output files generated, no need to rerun analysis
# Generate logP values
Dheat.Dctrl$logP <- -log10(Dheat.Dctrl$pvalue) * sign(Dheat.Dctrl$log2FoldChange)
# Write to file
setwd("code/GO_MWU")
write.table(data.frame(gene=rownames(Dheat.Dctrl), logP=Dheat.Dctrl$logP),
file = "Dheat.Dctrl.logP.txt",
row.names = FALSE, quote = FALSE, sep = ",")
# Run GO_MWU
commandArgs <- function(...) c("Dheat.Dctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 11 GO terms at 10% FDR
# Plot GO_MWU results
setwd("code/GO_MWU")
results <- gomwuPlot(inFile = "Dheat.Dctrl.logP.txt",
goAnnotations = "Mcavernosa_gene2go.txt",
goDivision = "BP",
level1 = 0.05, level2 = 0.025, level3 = 0.01, txtsize = 1.2, treeHeight = 0.5)
## GO terms dispayed: 7
## "Good genes" accounted for: 34 out of 180 ( 19% )
# save text representation of results to file
# save(results, file = "Dheat.Dctrl.GO_MWU.results.RData")
Red GO terms are significantly upregulated in heated vs. control D corals. Blue terms are significantly downregulated.
dc.cc <- rownames(counts(hs.dds)) %in% Dctrl.Cctrl.DEgenes
dh.ch <- rownames(counts(hs.dds)) %in% Dheat.Cheat.DEgenes
ch.cc <- rownames(counts(hs.dds)) %in% Cheat.Cctrl.DEgenes
dh.dc <- rownames(counts(hs.dds)) %in% Dheat.Dctrl.DEgenes
a <- cbind(dc.cc, ch.cc, dh.dc)
vennDiagram(vennCounts(a))
# a <- cbind(ch.cc, dh.dc)
# vennDiagram(vennCounts(a))
#
# a <- cbind(dh.ch, dc.cc)
# vennDiagram(vennCounts(a))
#
# a <- cbind(dc.cc, ch.cc)
# vennDiagram(vennCounts(a))
# Subset DEGs as padj < 0.1
DEGs.dds <- vsd2[ rownames(vsd2) %in% c(Dctrl.Cctrl.DEgenes, Dheat.Cheat.DEgenes,
Cheat.Cctrl.DEgenes, Dheat.Dctrl.DEgenes), ]
# Subset DEGs as raw p < 0.05
# DEGs.dds <- vsd2[ rownames(vsd2) %in% c(
# rownames(Dctrl.Cctrl.p05), rownames(Dheat.Cheat.p05),
# rownames(Cheat.Cctrl.p05), rownames(Dheat.Dctrl.p05)), ]
## Run PCA and return data to visualize with ggplot
# pcaData <- plotPCA(DEGs.dds, intgroup = c( "colony", "sym", "trt"), returnData = TRUE)
# percentVar <- round(100 * attr(pcaData, "percentVar"))
## Plot PCA
# ggplot(pcaData, aes(x = PC1, y = PC2, color = colony, shape = sym:trt)) +
# geom_point(alpha = 0.5) +
# labs(title = "PCA of normalized gene expression") +
# xlab(paste0("PC1: ", percentVar[1], "% variance")) +
# ylab(paste0("PC2: ", percentVar[2], "% variance"))
## Calculate Euclidean distances among samples
sampleDists <- dist(t(assay(DEGs.dds)))
sampleDistMatrix <- as.matrix( sampleDists )
## PCoA
mds <- as.data.frame(colData(DEGs.dds)) %>%
cbind(cmdscale(sampleDistMatrix, k = 2))
# Create convex hulls to plot around groups
Cc.hull <- mds[mds$sym == "C" & mds$trt == "ctrl", ][chull(mds[mds$sym == "C" & mds$trt == "ctrl", c("1", "2")]), ]
Dc.hull <- mds[mds$sym == "D" & mds$trt == "ctrl", ][chull(mds[mds$sym == "D" & mds$trt == "ctrl", c("1", "2")]), ]
Ch.hull <- mds[mds$sym == "C" & mds$trt == "heat", ][chull(mds[mds$sym == "C" & mds$trt == "heat", c("1", "2")]), ]
Dh.hull <- mds[mds$sym == "D" & mds$trt == "heat", ][chull(mds[mds$sym == "D" & mds$trt == "heat", c("1", "2")]), ]
hull.data <- rbind(Cc.hull, Dc.hull, Ch.hull, Dh.hull) #combine grp.a and grp.b
# Plot PCoA
ggplot(mds, aes(x = `1`, y = -`2`, shape = sym:trt)) +
geom_point(alpha = 0.5) + coord_fixed() +
geom_polygon(data = hull.data, aes(x = `1`, y = -`2`, fill = sym:trt, group = sym:trt), alpha = 0.30) +
labs(title = "PCoA of DEGs")
Other coral genotypes were bleached but did not shuffle their symbionts – they had clade C to begin with, and clade C after recovery. We can use these samples as controls to test whether the change in gene expression in corals that shuffle is attributable to the symbiont change or the previous stress.
# Colonies EXCEPT 20, 22, and 26
# Only time point at end of heat stress (but these were all at ambient - no hs)
ns.dds <- dds[, !colData(dds)$colony %in% c(20, 22, 26) & colData(dds)$date_sampled == "2017-10-28"]
# Add design formula for heat stress experiment and DROP UNUSED FACTOR LEVELS!
design(ns.dds) <- formula(~ colony + trt1)
colData(ns.dds) <- droplevels(colData(ns.dds))
# Run DESeq pipeline
ns.dds <- DESeq(ns.dds)
# Find DE genes between prev. bleached vs. control (trt1)
res <- results(ns.dds, contrast = c("trt1", "b", "c"))
summary(res) ### No differentially expressed genes!
##
## out of 17018 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
No genes are differentially expressed in corals that bleached and recovered with the same symbionts vs. those that never bleached. This suggests transcriptomes have ‘recovered’ from the initial bleaching treatment, and that the genes that are differentially expressed in colonies that shuffled from C to D are actuially due to the change in symbionts after bleaching, not the bleaching itself.